{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "# Demo of DBSCAN clustering algorithm\n",
    "\n",
    "\n",
    "Finds core samples of high density and expands clusters from them.\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(__doc__)\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "from sklearn.cluster import DBSCAN\n",
    "from sklearn import metrics\n",
    "from sklearn.datasets.samples_generator import make_blobs\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "\n",
    "# #############################################################################\n",
    "# Generate sample data\n",
    "centers = [[1, 1], [-1, -1], [1, -1]]\n",
    "X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,\n",
    "                            random_state=0)\n",
    "\n",
    "X = StandardScaler().fit_transform(X)\n",
    "\n",
    "# #############################################################################\n",
    "# Compute DBSCAN\n",
    "db = DBSCAN(eps=0.3, min_samples=10).fit(X)\n",
    "core_samples_mask = np.zeros_like(db.labels_, dtype=bool)\n",
    "core_samples_mask[db.core_sample_indices_] = True\n",
    "labels = db.labels_\n",
    "\n",
    "# Number of clusters in labels, ignoring noise if present.\n",
    "n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)\n",
    "\n",
    "print('Estimated number of clusters: %d' % n_clusters_)\n",
    "print(\"Homogeneity: %0.3f\" % metrics.homogeneity_score(labels_true, labels))\n",
    "print(\"Completeness: %0.3f\" % metrics.completeness_score(labels_true, labels))\n",
    "print(\"V-measure: %0.3f\" % metrics.v_measure_score(labels_true, labels))\n",
    "print(\"Adjusted Rand Index: %0.3f\"\n",
    "      % metrics.adjusted_rand_score(labels_true, labels))\n",
    "print(\"Adjusted Mutual Information: %0.3f\"\n",
    "      % metrics.adjusted_mutual_info_score(labels_true, labels))\n",
    "print(\"Silhouette Coefficient: %0.3f\"\n",
    "      % metrics.silhouette_score(X, labels))\n",
    "\n",
    "# #############################################################################\n",
    "# Plot result\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Black removed and is used for noise instead.\n",
    "unique_labels = set(labels)\n",
    "colors = [plt.cm.Spectral(each)\n",
    "          for each in np.linspace(0, 1, len(unique_labels))]\n",
    "for k, col in zip(unique_labels, colors):\n",
    "    if k == -1:\n",
    "        # Black used for noise.\n",
    "        col = [0, 0, 0, 1]\n",
    "\n",
    "    class_member_mask = (labels == k)\n",
    "\n",
    "    xy = X[class_member_mask & core_samples_mask]\n",
    "    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),\n",
    "             markeredgecolor='k', markersize=14)\n",
    "\n",
    "    xy = X[class_member_mask & ~core_samples_mask]\n",
    "    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),\n",
    "             markeredgecolor='k', markersize=6)\n",
    "\n",
    "plt.title('Estimated number of clusters: %d' % n_clusters_)\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
